###### Maintext: Figure 1
###### Taxation Over Time

gc(); rm(list = ls()); set.seed(12345)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Note: if you are not using R Studio this command will not work, set WD to source file location manually


############### SCRIPT SUMMARY #####################

packages <- c("dplyr", "ggplot2")
lapply(packages, require, character.only = T)
source("functions.R")


################ FIG 1: Taxation over time

## Load data
load("data/cleaned/vat_panel_cleaned15Nov.RData")

dfTax <- dfMerge

## Get some statistics by year

useVars <- c("VAT_i", "PIT_i", "CIT_i", "GST_i", "direct", "indirect", "trade",
             "tax_gs_vat", "tax_gs_excises", "tax_gs_general")
useNames <- c("Value-Added", "Personal Income",
              "Corporate Income", "General Services",
              "Direct", "Indirect", "Trade",
              "Value-Added", "Excise", "General Services")

dfTax[, useVars] <- sapply(useVars, FUN = function(x){as.numeric(dfTax[, x])})
dfSum <- as.data.frame(apply(dfTax[, useVars], 2, FUN = function(x){tapply(x[dfTax$year >= 1980],
                                                                           dfTax$year[dfTax$year >= 1980], mean, na.rm = T)}))
dfSum$year <- 1980:2018
rownames(dfSum) <- NULL

dfPlot <- data.frame()
for(i in 1:length(useVars)){
  dfNew <- data.frame(est = dfSum[, useVars[i]],
                      year = dfSum$year,
                      var = rep(useVars[i], nrow(dfSum)),
                      label = rep(useNames[i], nrow(dfSum)))
  dfPlot <- rbind(dfPlot, dfNew)
}

gdpVars <- c("direct", "indirect", "trade",
             "tax_gs_vat", "tax_gs_excises", "tax_gs_general")
dfShare <- dfPlot[dfPlot$var %in% gdpVars, ]
dfShare$label <- factor(dfShare$label, levels = c("Indirect", "Direct", "Value-Added",
                                                  "General Services", "Trade", "Excise"))
dfShare$line_type <- ifelse(dfShare$label %in% c("Indirect", "Direct"), 1, 2)

## Define graphical parameters and plot

greyStart <- 0.1
greyEnd <- 0.8

dfShareSub <- dfShare[dfShare$label %in% c("Indirect", "Direct"), ]
pShare <- ggplot() + geom_line(aes(y = est, x = year, colour = label), size=1.5,
                               data = dfShareSub, stat="identity") +
  theme_custom(legend_position = "bottom",
               legend_justification = c(0.5, 0.5)) +
  scale_colour_grey(start = greyStart, end = greyEnd) +
  guides(colour = guide_legend("Tax Type")) +
  xlab("\nYear") + ylab("Revenue Raised as % of GDP\n") +
  ggtitle("Levels of Extraction by Tax Type") +
  annotate("text", x = 2016, y = c(11.4, 8.7), label = c("Indirect", "Direct"),
           size = 5) +
  labs(tag = "A")

dfIntro <- dfPlot[!dfPlot$var %in% gdpVars, ]
dfIntro$label <- factor(dfIntro$label, levels = c("Value-Added", "General Services",
                                                  "Corporate Income", "Personal Income"))
dfIntro$share_no <- 1 - dfIntro$est
pIntro <- ggplot() + geom_line(aes(y = est, x = year, colour = label), size=1.5,
                               data = dfIntro, stat="identity") +  
  theme_custom(legend_position = "bottom",
               legend_justification = c(0.5, 0.5)) +
  scale_colour_grey(start = greyStart, end = greyEnd) +
  xlab("\nYear") + ylab("Share of Countries with Tax\n") +
  ggtitle("Tax Prevalence Over Time") +
  guides(colour = guide_legend(title = "Tax Type", nrow = 2)) +
  labs(tag = "B")
pIntro
require(patchwork)

patchwork <- (pShare + pIntro)
print(patchwork)

require(gridExtra)
pdf(file = "figures/fig_1.pdf", height = 6.5, width = 12)
grid.arrange(pShare, pIntro, nrow = 1)
dev.off()

save.image(file = "results/fig_1.RData")
